$title  demand.GMS

* ###################################

* Per capita income, consumption patterns, and CO2 emissions

* Journal of the Association of Environmental and Resource Economists

* Justin Caron and Thibault Fally

* April 2021

*###################################


* ESTIMATE DEMAND SYSTEM PARAMETERS - NH CES DEMAND SYSTEM 


$set SLASH \
$if %system.filesys% == UNIX $set SLASH /


$if not set datadir $set datadir "data%SLASH%"
$setglobal datadir %datadir%


$if not set spec $set spec theta4

* ---------------------------------------------------------------------------------------------
* OPTIONS :

* subset of regions to include
$if not set regsubset $set regsubset rall

* MINIMIZE ERRORS ON LOGS OR CONSUMPTION SHARES ?
* choices : log, consshare, logweighted
$ if not set objective $set objective logweighted

* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

* skip reporting ?
* choices : yes, no
$ if not set skipreporting $set skipreporting yes

* BOOSTRAP ?
$if not set boot $set boot no
* set nb of boostrap iterations:
$if not set itr $set itr 4

* with gas and gdt aggregated
$if not set ds $set ds gtap8gas

*$include loaddata.gms
$include loaddata_gtap8.gms


* ------------------------------------
* IMPORT  GRAVITY ESTIMATES

parameter       coeffs stores all sector-specific coefficients
                phiest
                tcostest
                logphiest
                IM
                EX
                cst;


*$gdxin estimates\gravityestimates_%ds%_internationaltrade.gdx
$gdxin estimates\gravityestimates_%ds%.gdx
$load  coeffs phiest im ex cst tcostest

parameter esttheta;
esttheta(i) = 0;

display coeffs;

* --  DEFINE SECTORS WHICH ARE MOSTLY INTERMEDIATES

parameter sharefd % FD in vdm (dom output);
set intermediates(i);

sharefd(i) = (sum((r,g), vdfm(i,g,r) + vifm(i,g,r)) - sum((j,r), vdfm(i,j,r) + vifm(i,j,r)))/ sum((r,g), vdfm(i,g,r) + vifm(i,g,r));

* defined as "intermediate" if  less than 10 % of production goes to final demand
intermediates(i)$(sharefd(i)<0.1) = yes;
display sharefd, intermediates;

* -- DEFINE SERVICE AND TRADABLE SECTORS SECTORS

*set     serv(i) service sectors / CMN, DWE,ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP, GDT, ELY/
set     serv(i) service sectors /osg/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;


* -----------------------------------------------------------
* NLLS Demand estimations
* -----------------------------------------------------------
* the only exogenous parameters needed are: x(i,r), per capita expenditure, w(n) = wage = PCI


parameter       x(i,r)          per capita expenditure
                w(r)            wage = PCI
                indexp(i)       industry total expenditure,
                logphi,
                phi
                bhat;

set i_(i), r_(r),g_(g),
        rall(r) set of 94 regions;
  i_(i) = no;


$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);


* -- SELECT WHICH SECTORS TO USE HERE :

* using all sectors for which we have estimates of PHI :
i_(i)$sum(r,phi(i,r))= yes;

* selecting regions :
rall(r)$sum(i,phi(i,r)) = yes;


* use this parameter in excel to get this set of regions :
set rskm(r) regions with skill ratio ensowments within the 2 middle quartiles
   /MYS, VNM, GEO,KAZ, EGY, ALB, PAK, NGA, SEN, THA, PHL, TUR, ZWE, LTU, COL, LVA, SVK, KOR, TUN, ARG, BOL, HRV, HUN, VEN, MLT, RUS, CZE, SVN, CHL, UKR, MUS, CRI, PAN, IRN, EST, POL,
  NZL, CYP, URY, BRA, ZAF, SGP, CAN, BWA, BGR/ ;
* note, added bgr to estimate median income country elasticities

* other sets :

set red(r) / ALB, ARM, AZE, BGD, BGR, BLR, BOL, CHN, COL, ECU, EGY, GEO, GTM, IDN, IND, KAZ, KGZ, LKA, MAR, MYS, NGA, NIC, PAK, PER, PHL, PRY, ROU, SEN, THA, TUR, UKR, VNM, ZMB, ZWE /;

set blue(r) / ARG, BRA, BWA, CHL, CRI, CYP, CZE, EST, HRV, HUN, IRN, KOR, LTU, LVA, MEX, MLT, MUS, NZL, PAN, POL, RUS, SVK, SVN, TUN, URY, VEN, ZAF /;

set green(r) / AUS, AUT, BEL, CAN, CHE, DEU, ESP, FIN, FRA, GBR, GRC, HKG, IRL, ITA, JPN, NLD, PRT, SGP, SWE, TWN /;



set nored(r) / ARG, AUS, AUT, BEL, BRA, BWA, CAN, CHE, CHL, CRI, CYP, CZE, DEU, DNK, ESP, EST, ETH, FIN, FRA, GBR,
GRC, HKG, HRV, HUN, IRL, IRN, ITA, JPN, KHM, KOR, LAO, LTU, LUX, LVA, MDG, MEX, MLT,  MOZ, MUS, MWI, NLD, NOR,
NZL, PAN, POL, PRT, RUS, SGP, SVK, SVN, SWE, TUN, TWN, TZA, UGA, URY, USA, VEN, ZAF /;


set noblue(r) / ALB, ARM, AUS, AUT, AZE, BEL, BGD, BGR, BLR, BOL, CAN, CHE, CHN, COL, DEU, DNK, ECU, EGY, ESP, ETH,
FIN, FRA, GBR, GEO, GRC, GTM, HKG, IDN, IND, IRL, ITA, JPN, KAZ, KGZ, KHM, LAO, LKA, LUX, MAR, MDG,  MOZ, MWI,
MYS, NGA, NIC, NLD, NOR, PAK, PER, PHL, PRT, PRY, ROU, SEN, SGP, SWE, THA, TUR, TWN, TZA, UGA, UKR, USA, VNM, ZMB, ZWE /;


set nogreen(r) / ALB, ARG, ARM, AZE, BGD, BGR, BLR, BOL, BRA, BWA, CHL, CHN, COL, CRI, CYP, CZE, DNK, ECU, EGY, EST, ETH,
GEO, GTM, HRV, HUN, IDN, IND, IRN, KAZ, KGZ, KHM, KOR, LAO, LKA, LTU, LUX, LVA, MAR, MDG, MEX, MLT,  MOZ, MUS, MWI,
MYS, NGA, NIC, NOR, NZL, PAK, PAN, PER, PHL, POL, PRY, ROU, RUS, SEN, SVK, SVN, THA, TUN, TUR, TZA, UGA, UKR, URY, USA,
VEN, VNM, ZAF, ZMB, ZWE /;



* trick to have r_ defined as "base" set - need that for bootstrapping, cant use ORD on an unstable set
execute_unload 'setr.gdx', rall, red, blue, green, rskm;

$gdxin setr

* IF USING RED BLUE GREEN SETS, USE THIS PART OF THE CODE :
$if "%regsubset%"=="red" $load r_=red
$if "%regsubset%"=="blue" $load r_=blue
$if "%regsubset%"=="green" $load r_=green
$if "%regsubset%"=="rall" $load r_=rall
$if "%regsubset%"=="riq" $load r_=riq
$if "%regsubset%"=="rskm" $load r_=rskm


display r_;


i_(intermediates) = no;

i_("coa") = yes;
i_("gas") = yes;


*i_(i) = no;

i_("p_c") = yes;
i_("coa") = yes;
i_("gas") = yes;
i_("obs") = yes;
i_("ofi") = yes;
i_("trd") = yes;

* DROPPING DWELLINGS :
i_("dwe") = no;


display i,i_, intermediates;
alias(i_,j_);
alias(r_,s_);

* define new g set
g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;


parameter nbr, nbi;
nbr = card(r_);
nbi = card(i_);

display nbr, nbi;

display r_, i_;


* definine per-capita expenditure only on selected sectors:
w(r)$pop(r) = 10e8* sum(i_,fd(i_,r)) /pop(r);
x(i,r)$pop(r)  = 10e8 * fd(i,r) / pop(r);
indexp(i)  = sum(r_,fd(i,r_)) / sum((r_,i.local),fd(i,r_));


display w;


* ALT WEIGHING SCHEMES, CHOSE HERE:

*	 maximum exp share
indexp(i)  = smax(r_, x(i,r_)/w(r_));

logphi(i,r)$phi(i,r) =log(phi(i,r));


* ignoring observations with very low shares of coal and gas

parameter expshare, sectdrop;
expshare(i_,r_) = fd(i_,r_) / sum(i_.local,fd(i_,r_));

sectdrop(i_,r_) = 1;

*   DROP in reporting at least.. (matters for R2)
sectdrop(i_,r_) = 1;
sectdrop(i_,r_)$(x(i_,r_) = 0 ) =0 ;


* compute some statistics
parameter       fittedPCexp     fitted per capita expenditure
                fittedexp       fitted expenditure
*                fittedexpshare  fitted expenditure share
                sstot total sum of squares
                rsquared
                nobs number of observations
                Fstat F-test statistic
                nbp number of parameters in model
                df degrees of freedom for Fstat
                sigma2hat estimated variance of regression
                modelselection;


* ----------------------------------------------
* for bootstrapping :

set boot /1*%itr%/;
option seed=081567;

parameter  wt(r)        weight in the objective
           bootcoef(boot,*,*)
           dim          number of ctries
           rdraw        random draw from the pool of ctries
           cardzz(r)    index on each observation
           wtchk        weight check
;
wt(r_)=1;

alias (r_,k,kk,z,zz);
*cardzz(k,kk)=(ord(k)-1)*card(k) + ord(kk);
* PROBLEM CANNOT USE ORD ON AN "UNSTABLE SET", see correction "trick" above
cardzz(k) = ord(k);
dim=card(k);
display dim, cardzz;


* which country-sector pairs are missing
set missingx(i,r);

missingx(i_,r_) = yes;
missingx(i_,r_)$x(i_,r_) = no;

display missingx;
parameter fe_norm(i);
fe_norm(i) = indexp(i)* 10;
fe_norm("pdr") = indexp("pdr") / 100;

set lowincome(r);


lowincome(r_)$(log(w(r_)) < 8.65) = 1;

set	se(i)	secondary energy        /coa,ely,gas, p_c/  ;
set	nse(i)  non-energy goods;

nse(i_) = yes;
nse(se) = no;



* ---------------------------------------------------------
* -- DEFINE MODEL :

variables sse_;

Positive variables       U_(r), theta_, delta_, b_  ;

variables rho_, nu_, x_;



VARIABLES mu_(i), sigma_, epsilondiff_(i),  fe_(i) the LOGGED fixed effect, commonrho_  ;

equations  obj_log, obj_consshare, obj_logweighted, budget, commontheta,  epsilon_fact, commondelta, est_theta, sigmadiff, positive_g_cstr, u_cstr, 
commonrho;


* -----------------------
* CLM ESTIMATION


* this is the CLM objective function
obj_logweighted.. sse_ =e=      sum((i_,r_)$x(i_,r_),

 sectdrop(i_,r_)*
                indexp(i_) *
                 wt(r_) * sqr(log(x(i_,r_)) - (
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
* Note: here we redefined g to include ** (1/1- sigma)  -- BUT, this means that the constraint on g (below), must include sigma.
* also, means we should have no non-negativity constraints on rho and nu
* here rho = epsilon - sigma
 (  fe_(i_)*fe_norm(i_) + rho_(i_)   * log(U_(r_)) - nu_(i_)  * log(U_(r_))*log(U_(r_)) ) )));

* constraint on mu
commontheta(i_) ..      mu_(i_)  =e= ( sigma_ -1) / theta_;

* for homoth version
commonrho(i_) ..      rho_(i_)  =e= rho_("obs") ;

positive_g_cstr(i_,R_) ..  ( rho_(i_) - 2* log(U_(R_)) * nu_(i_)) / (1-sigma_)  =g= 0 ;

budget(r_) ..   sum(i_, exp( 
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
 (  fe_(i_)*fe_norm(i_) + rho_(i_)  * log(U_(r_)) - nu_(i_)  * log(U_(r_))*log(U_(r_)) )
)) =e= w(r_);


* initialize variables:
sse_.L = 0;

epsilondiff_.L(i_) = 0.25;
epsilondiff_.UP(i_) = 1e3;

* fix one epsilon_ to one (textiles: one that is not a service industry) :
* solution is indep of this choice (if none of the bounds are binding)

* CLM: can on only normalize one of lambda and sigma

U_.L(r_)= 10;
* note, if sigma > 1 than the lower bound becomes important..
U_.LO(r_)= 1E-4;
U_.UP(r_)= 1E3;


* TURN OFF QUAD TERM:
nu_.FX(i_) = 0;

rho_.L(i_) = 5;
rho_.UP(i_) = 1E2;
rho_.LO(i_) =-1E3;


fe_.L(i_) = - 50;
fe_.LO(i_) = -1E5;
fe_.UP(i_) = 1E2;


mu_.UP(i_) =  1e2;
mu_.L(i_) =  0.5;
mu_.LO(i_) =  -1e3;



sigma_.L = 1.3;
sigma_.LO = 1.1;

x_.LO(i_,R_) = 1e-6;
x_.UP(i_,R_) = 1e4;

theta_.L = 1;
theta_.UP = 30;



* should be secondary energy only
set ene(i) / coa, gas, ely, p_c /;

parameter forstata;

forstata("fd",i_,r_) = x(i_,r_);
forstata("fd","all ene",r_) = sum(ene, x(ene,r_));



forstata("logpcfd",i_,r_)$x(i_,r_) = log(x(i_,r_));
forstata("logpcfd","all ene",r_) = log(forstata("fd","all ene",r_));

forstata("logpci",i_,r_) = log(w(r_));
forstata("logpci","all ene",r_) = log(w(r_));

* this is total expenditure for the country (for potential weighting)
forstata("total exp",i_,r_) = expenditure(r_);
forstata("total exp","all ene",r_) = expenditure(r_);



parameter  lambda, mu,  sse, fe, theta, epsilon_scale, eta, avgmu;
theta=0;
epsilon_scale=0;
parameter specificationstats for reporting;

* -- define specification :

* FOR CLM, USE THESE:

$if "%spec%"=="tc"  model nlls /obj_%objective%, budget, positive_g_cstr /; 
$if "%spec%"=="tc" nbp("non-homoth")= card(r_) + 3*card(i_) + 1;


$if "%spec%"=="theta4" theta_.FX = 4;  model nlls /obj_%objective%, commontheta , budget, positive_g_cstr /; 
$if "%spec%"=="theta4_noenergy" rho_.FX(ene)=-1; theta_.FX = 4; eta_.FX(i_) = 0; model nlls /obj_%objective%, commontheta , budget, positive_g_cstr /; 
$if "%spec%"=="theta4" nbp("non-homoth")= card(r_) + 2*card(i_) + 1 ;


* -----------------------

nlls.reslim = 100000;

* -- Solve model :

$ontext
* change solver tolerance ?
nlls.optfile = 1 ;
FILE  OPT   conopt option file  / conopt.OPT /;
PUT OPT;
PUT 'rtredg  3.0e-13' /
 'rtobjr 3.0e-14'/
'rtpiva 1.0e-14'/
'rtpivt 1.0e-12'
PUTCLOSE OPT;
$offtext


* make a savepoint for boostraping
nlls.savepoint=1;
solve nlls using nlp minimizing sse_;
nlls.savepoint=0;



$if "%spec%"=="theta4_noenergy" display sse_.l; 
$if "%spec%"=="theta4_noenergy" $exit


* CHANGING TO CLM NAMES:
* calculate statistics
coeffs("Theta","coeff", i_) = theta_.L;
coeffs("Epsilon","coeff", i_) = sigma_.L + epsilondiff_.L(i_);
coeffs("Epsilondiff","coeff", i_) = epsilondiff_.L(i_);
coeffs("Sigma","coeff", i_) = sigma_.L;
coeffs("fixed effect","coeff", i_) = fe_.L(i_);

coeffs("rho","coeff", i_) = rho_.L(i_);
coeffs("nu","coeff", i_) = nu_.L(i_);

coeffs("mu","coeff", i_) = mu_.L(i_);


forstata("fitted nh",i_,r_) = 
exp( 
 sigma_.L * log(w(r_)) +
 mu_.L(i_)*logphi(i_,r_)  
 +
 (  fe_.L(i_)*fe_norm(i_) + rho_.L(i_) * log(U_.L(r_)) - nu_.L(i_)  * log(U_.L(r_))*log(U_.L(r_)) )
);

* drop gas and coal if not in the original data
forstata("fitted nh",i_,r_)$(not sectdrop(i_,r_)) = 0; 

* note: estimated rho and nu include the (1-sigma) term
forstata("incel",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
 (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * sum(i_.local, x(i_,r_)) /
   sum(i_.local,  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * x(i_,r_));


forstata("incel fitted dem",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
 (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * sum(i_.local, forstata("fitted nh",i_,r_) ) /
   sum(i_.local,  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * forstata("fitted nh",i_,r_) );

parameter incelast, avgincelast;

incelast("DIRECT",i_,r_) = forstata("incel",i_,r_);
incelast("DIRECT fitted dem",i_,r_) = forstata("incel fitted dem",i_,r_);



* weighted by final deman
avgincelast("direct","energy goods","all") = sum((r_,se), fd(se,r_) * IncElast("direct",se,r_)) /
	sum((r_,se), fd(se,r_) ) ;

avgincelast("direct","energy goods","lowincome") = sum((lowincome,se), fd(se,lowincome) * IncElast("direct",se,lowincome)) /
	sum((lowincome,se), fd(se,lowincome) ) ;


* elast of g to U
forstata("elast of g to U",i_,r_)$x(i_,r_) =  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_)) ) /  (1-sigma_.L);


parameter fittedexp; 

fittedexp("non-homoth",i_,r_) = forstata("fitted nh",i_,r_) / 10e8* pop(r_)  ;

* FOR REPORTING, CHOSE WHETHER TO USE WEIGHTED OR UNWEIGHTED R-SQUARED
parameter indexp_report;
*indexp_report(i_) = 1;
* if using  weighted:
indexp_report(i_) = indexp(i_);

sse_.L   = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - log(forstata("fitted nh",i_,r_) )));

nobs = card(i_) * card(r_);


* calculate the average elasticity of expenditures w.r.t phi :
* weighted by sectors size
avgmu("weighted") = sum(i_, indexp_report(i_) * mu_.L(i_)) / sum(i_, indexp_report(i_));
avgmu("weighted - energy goods") = sum(se, indexp_report(se) * mu_.L(se)) / sum(se, indexp_report(se));


avgmu("unweighted") = sum(i_,   mu_.L(i_)) / card(i_);

* calculate weighted Rsquare for logweighted specification
parameter weightedsstot, weightedrsquared;

weightedrsquared = 0;
weightedsstot = 0;

sse("non-homoth") = sse_.L;

sstot = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - sum((i_.local,r_.local)$x(i_,r_), log(x(i_,r_)))/nobs  ));

rsquared("nh","all","total") = 1 - sse_.L / sstot;

* by sector
rsquared("nh","all",i_) = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,r_)) -  log(x(i_,r_))  ))
 / 
  sum((r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  ))
;

* by sector
rsquared("nh","lowincome",i_) = 1 -   sum((lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) *  sqr(log(x(i_,lowincome)) - sum((lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* by sector
rsquared("nh","lowincome","total") = 1 -   sum((i_,lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((i_,lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) *  sqr(log(x(i_,lowincome)) - sum((i_.local,lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;


* energy goods only:
rsquared("nh","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) *  sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp_report(SE) *  sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;




modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;

* Schwarz = BIC

* in paper use this measure
modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") /nobs ;



* only for export -- will not be used
parameter lambda, sigma_scale;

lambda("non-homoth", r_) = U_.L(r_) ;
sigma_scale("non-homoth") = 0 ;
coeffs("backed out theta","coeff",i_) = 1;


* now estimating Homothetic version, i.e. CES..

* here, can either fix epsilon-sigma, or fix U
* decide to keep U fixed, and just impose all epsilon to be the same (rho in the model) (to keep U's comparable in level)

* note: rho is epsilon -sigma

* -- HOMOTHETIC VERSION

* re-ininitialize variables

U_.L(r_) = 1;
rho_.L(i_) = 1;

U_.LO(r_) = 0.1;
U_.UP(r_) = 1000;

U_.FX("USA") = 20;

rho_.UP(i_) = 100;
rho_.LO(i_) = -100;

sigma_.L = 1.6;
nu_.FX(i_) = 0;


*re-solve with sigmas fixed at one
*theta_.UP = 1E10;
* with no constraint on mu :
$if "%spec%"=="tc"  model nllshomoth /obj_%objective%,  budget, commonrho /; 
$if "%spec%"=="tc" nbp("homoth")= card(r_) + 2*card(i_) + 1 ;

$if "%spec%"=="theta4" theta_.FX = 4;  model nllshomoth /obj_%objective%, commontheta , budget, commonrho /; 
$if "%spec%"=="theta4" nbp("homoth")= card(r_) + card(i_) + 1;


*solve nllshomoth using nlp minimizing sse_;
* re-set to non-homoth for bootstrap


forstata("fitted h",i_,r_) = 
exp( 
 sigma_.L * log(w(r_)) +
 mu_.L(i_)*logphi(i_,r_)  
 +
 (  fe_.L(i_)*fe_norm(i_) + rho_.L(i_) * log(U_.L(r_)) - nu_.L(i_)  * log(U_.L(r_))*log(U_.L(r_)) )
);


* drop gas and coal if not in the original data
forstata("fitted h",i_,r_)$(not sectdrop(i_,r_)) = 0; 
fittedexp("homoth",i_,r_) = forstata("fitted h",i_,r_) / 10e8* pop(r_)  ;

***** for unweighted R2
sse_.L =  sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - log(forstata("fitted h",i_,r_) )));

modelselection("aic weighted","homoth") = log(sse_.L /nobs) + 2* nbp("homoth") / nobs;

modelselection("bic weighted","homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("homoth") /nobs ;

* Rsquared

rsquared("h","all","total") = 1 - sse_.L / sstot;

* within sector
rsquared("h","all",i_) = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,r_)) -  log(x(i_,r_))  ))
 / 
  sum((r_)$x(i_,r_),sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  ))
;

* by sector
rsquared("H","lowincome",i_) = 1 -   sum((lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(x(i_,lowincome)) - sum((lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* by sector
rsquared("h","lowincome","total") = 1 -   sum((i_,lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((i_,lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(x(i_,lowincome)) - sum((i_.local,lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* energy goods only:
rsquared("h","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;


* compute partial Rsquared for NH version
sse("homoth") = sse_.L;

Parameter withinrsquare;

withinrsquare("all sectors","to homoth") = 1 - (sse("non-homoth") / sse("homoth"));
withinrsquare("energy goods","to homoth") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 /  sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
;

withinrsquare("non-energy goods","to homoth") = 1 -   sum((nse,r_)$(x(nse,r_) and sectdrop(nse,r_)), sectdrop(nSE,r_) *  indexp_report(nSE) * sqr(log(forstata("fitted nh",nse,r_)) -  log(x(nse,r_))  ))
 /  sum((nse,r_)$(x(nse,r_) and sectdrop(nse,r_)), sectdrop(nSE,r_) *  indexp_report(nSE) * sqr(log(forstata("fitted h",nse,r_)) -  log(x(nse,r_))  ))
;

withinrsquare(i_,"to homoth") = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,r_)) -  log(x(i_,r_))  ))
 /  sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,r_)) -  log(x(i_,r_))  ))
;


* FOR HOMONOTC USING THE CRIE ESTIMATE! SHOULD BE THE SAME..
withinrsquare("to homoth notc","all") = 1 - (sse("non-homoth") /
964
);



withinrsquare("all sectors","to homoth notc") = 1 - (sse("non-homoth") / 964);
withinrsquare("energy goods","to homoth notc") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 /  sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
;


* 1 - test restriction of mu = 0
* import from notc case manually


$if "%objective%"=="log"        sse("no TC imp") = 14003.3885458859  ;
$if "%objective%"=="consshare"  sse("no TC imp") = 1.51084988080653   ;
$if "%objective%"=="logweighted"  sse("no TC imp") = 1794672.8200  ;


nbp("no TC imp") = card(r_) + 2 *card(i_) ;

fstat("mu=0")$(nbp("non-homoth") - nbp("no TC imp"))  = ((sse("no TC imp") - sse("non-homoth"))
         / (nbp("non-homoth") - nbp("no TC imp")))
         / (sse("non-homoth")
         / ( nobs - nbp("non-homoth"))) ;

* this F stat has an F distribution, with (p2 - p1, n - p2) degrees of freedom
* according to Greene : only approximate in non-linear case, but good enough
df("1") = (nbp("non-homoth") - nbp("no TC imp"));
df("2") = nobs - nbp("non-homoth");

* so we need to compare to F(56,5002) .. at 5% significance level, that is about 1.35
* (according to http://www.ma.utexas.edu/users/davis/375/popecol/tables/f005.html)
* we get a value of 8.79 > 1.35

* 2 - test restriction of sigma = 1

fstat("sigma=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;


* for energy goods only: compute model with sigma = 1 for energy, get SSE and paste it here:
* run tc_noenergy specification
fstat("sigma=1 energy true") = ((708   - sse("non-homoth")) / (4)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;
* so we need to compare to F(4,5002) .. at 5% significance level, that is about 1.9
* according to http://www.socscistatistics.com/pvalues/fdistribution.aspx  highly signif <0.00001

* alt computation because "true" doesn't work in the quadratic version
set nonene(i);
nonene(i_) = yes;
nonene(ene) = no;


parameter sse_noenergy, sse_energyonly;
sse_noenergy   = sum((ene,r_)$x(ene,r_), sectdrop(ene,r_) *  indexp_report(ene) *  sqr(log(x(ene,r_)) - log(forstata("fitted h",ene,r_) ))) +
sum((nonene,r_)$x(nonene,r_), sectdrop(nonene,r_) *  indexp_report(nonene) *  sqr(log(x(nonene,r_)) - log(forstata("fitted nh",nonene,r_) )));

sse_energyonly  = sum((ene,r_)$x(ene,r_), sectdrop(ene,r_) *  indexp_report(ene) *  sqr(log(x(ene,r_)) - log(forstata("fitted h",ene,r_) ))) ;

display sse_energyonly;

* approx
fstat("sigma=1 energy") = ((sse_noenergy   - sse("non-homoth")) / (4)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* 3 - test restriction of mu = (sigma-1) / theta
* if high, mean mu is significantly different from (sigma-1) / theta in data

* import SSE of case with no-restriction (tc)
$if "%objective%"=="logweighted"  sse("TC imp") = 1515414.70042461;
nbp("TC imp") = card(r_) + 3 *card(i_) ;

fstat("commontheta")$(nbp("TC imp") - nbp("non-homoth")) = ((sse("non-homoth") - sse("TC imp")) / (nbp("TC imp") - nbp("non-homoth"))) / (sse("TC imp") / ( nobs - nbp("TC imp"))) ;

specificationstats("sigma") = sigma_.L;

specificationstats("avg incel energy goods") = avgincelast("direct","energy goods","all"); 
specificationstats("avg incel energy goods - low income ctries") = avgincelast("direct","energy goods","lowincome") ;


specificationstats("avgmu - weighted") = avgmu("weighted");
specificationstats("avgmu - weighted - energy goods") = avgmu("weighted - energy goods");
specificationstats("F-stat sigma=1") = fstat("sigma=1") ;
specificationstats("F-stat sigma=1 energy") = fstat("sigma=1 energy");
specificationstats("F-stat flex nh") = 100000000000000000000;
specificationstats("R2") = rsquared("nh","all","total");
specificationstats("R2 energy goods") = rsquared("nh","all","energy goods");
specificationstats("within R2") = withinrsquare("all sectors","to homoth") ;
specificationstats("within R2 energy goods") = withinrsquare("energy goods","to homoth");
specificationstats("aic weighted") = modelselection("aic weighted","non-homoth");
specificationstats("bic weighted") = modelselection("bic weighted","non-homoth");
specificationstats("p") = nbp("non-homoth");
specificationstats("n") = nobs;
specificationstats("df1") = df("1");
specificationstats("df2") = df("2");
specificationstats("sse unweighted") = sse("non-homoth");
specificationstats("sstot") = sstot;
specificationstats("avgmu - unweighted") = avgmu("unweighted") ;
specificationstats("bic unweighted") = modelselection("bic unweighted","non-homoth");
specificationstats("within Rsquare") = withinrsquare("to homoth","all");
specificationstats("within Rsquare (energy goods)") = withinrsquare("to homoth","energy goods");
specificationstats("theta") = theta;


execute_unload 'estimates\estimates_%ds%_%objective%_%spec%_CLM.gdx'     coeffs,U_, sigma_, epsilondiff_, forstata, weightedrsquared, rho_, nu_, fe_, mu_, ex, im, cst, lambda, sigma_scale, fittedexp, sectdrop, expshare, rsquared, incelast, withinrsquare, sse, modelselection, nbp, specificationstats, indexp_report, fstat;



